Setup


In [45]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

Generating data and Pandas DataFrame with random values


In [63]:
s = pd.Series([1,3,5,np.nan,6,8])
dates = pd.date_range('20130101',periods=6)
df = pd.DataFrame(np.random.randn(6,5),index=dates,columns=list('ABCDE'))

In [62]:
np.random.randn(6,4)


Out[62]:
array([[-0.72141978,  1.75647938, -0.84297956, -1.87490759],
       [ 0.61284124,  0.88152499,  0.23692212, -0.46196475],
       [ 0.49427992,  0.83530742, -1.06899914, -0.40358203],
       [ 1.26489497, -1.06422262, -1.55284967,  1.16017867],
       [-0.2598227 ,  0.86774058, -0.64080989,  0.75744095],
       [-0.72803805,  0.72472441,  0.81294184,  0.24446359]])

In [30]:
df2 = pd.DataFrame({ 'A' : 1.,
                    'B' : pd.Timestamp('20130102'),
                    'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
                    'D' : np.array([3] * 4,dtype='int32'),
                    'E' : 'foo' })

In [31]:
df2


Out[31]:
A B C D E
0 1 2013-01-02 00:00:00 1 3 foo
1 1 2013-01-02 00:00:00 1 3 foo
2 1 2013-01-02 00:00:00 1 3 foo
3 1 2013-01-02 00:00:00 1 3 foo

4 rows × 5 columns


In [32]:
#I guess this shows where the data types are.
df2.dtypes


Out[32]:
A           float64
B    datetime64[ns]
C           float32
D             int32
E            object
dtype: object

In [64]:
df.head()


Out[64]:
A B C D E
2013-01-01 1.100527 -0.381699 -0.451535 0.713584 -1.293972
2013-01-02 -0.391748 -1.480627 0.590120 -0.640268 -0.440093
2013-01-03 0.661144 0.361374 -0.882714 1.120732 0.276473
2013-01-04 -1.130035 -0.168796 0.072885 0.875275 0.575093
2013-01-05 2.405335 -0.583955 -0.690658 -2.137181 -1.716164

5 rows × 5 columns


In [37]:
df.index


Out[37]:
<class 'pandas.tseries.index.DatetimeIndex'>
[2013-01-01 00:00:00, ..., 2013-01-06 00:00:00]
Length: 6, Freq: D, Timezone: None

In [41]:
df.values[0][0]


Out[41]:
0.26366085386647958

In [42]:
df.T


Out[42]:
2013-01-01 00:00:00 2013-01-02 00:00:00 2013-01-03 00:00:00 2013-01-04 00:00:00 2013-01-05 00:00:00 2013-01-06 00:00:00
A 0.263661 -1.148435 1.753792 0.771710 -0.456610 -0.375286
B 1.263971 -0.750341 -0.436259 1.207909 0.944913 -2.548341
C -0.039726 -0.228849 0.566032 1.505498 0.573583 0.647772
D 0.352130 0.991504 0.392104 -1.446948 -1.511025 2.033515

4 rows × 6 columns


In [44]:
df.sort(columns='B', ascending=False)


Out[44]:
A B C D
2013-01-01 0.263661 1.263971 -0.039726 0.352130
2013-01-04 0.771710 1.207909 1.505498 -1.446948
2013-01-05 -0.456610 0.944913 0.573583 -1.511025
2013-01-03 1.753792 -0.436259 0.566032 0.392104
2013-01-02 -1.148435 -0.750341 -0.228849 0.991504
2013-01-06 -0.375286 -2.548341 0.647772 2.033515

6 rows × 4 columns


In [48]:
df['A']


Out[48]:
2013-01-01    0.263661
2013-01-02   -1.148435
2013-01-03    1.753792
2013-01-04    0.771710
2013-01-05   -0.456610
2013-01-06   -0.375286
Freq: D, Name: A, dtype: float64

In [51]:
df.loc[:,['A','B']]


Out[51]:
A B
2013-01-01 0.263661 1.263971
2013-01-02 -1.148435 -0.750341
2013-01-03 1.753792 -0.436259
2013-01-04 0.771710 1.207909
2013-01-05 -0.456610 0.944913
2013-01-06 -0.375286 -2.548341

6 rows × 2 columns


In [52]:
df.at[dates[0],'A']


Out[52]:
0.26366085386647958

In [53]:
df.iloc[3:5,0:2]


Out[53]:
A B
2013-01-04 0.77171 1.207909
2013-01-05 -0.45661 0.944913

2 rows × 2 columns


In [54]:



Out[54]:
A B C D
2013-01-01 0.263661 1.263971 -0.039726 0.352130
2013-01-02 -1.148435 -0.750341 -0.228849 0.991504
2013-01-03 1.753792 -0.436259 0.566032 0.392104
2013-01-04 0.771710 1.207909 1.505498 -1.446948
2013-01-05 -0.456610 0.944913 0.573583 -1.511025
2013-01-06 -0.375286 -2.548341 0.647772 2.033515

6 rows × 4 columns


In [57]:
df.iloc[1:3,:]


Out[57]:
A B C D
2013-01-02 -1.148435 -0.750341 -0.228849 0.991504
2013-01-03 1.753792 -0.436259 0.566032 0.392104

2 rows × 4 columns


In [58]:
#most efficient
df.iat[1,1]


Out[58]:
-0.75034058387714442

In [ ]: